import pandas as pd
import numpy as np

input_data=pd.read_csv('CPTAC2_Breast_Prospective_Collection_BI_Proteome.tmt10.tsv', sep='\t', index_col=0)
# print(len(input_data.columns))
unshared_col = [i for i in input_data.columns if 'Unshared' in i]
# print("----")
# print(len(unshared_col))
# print(unshared_col)
unshared_data = input_data[unshared_col]
stddevs = unshared_data.iloc[2]
unshared_values = unshared_data.iloc[3:, :]

speciments = pd.read_excel('S039_Breast_Cancer_Prospective_Collection_Specimens_r1.xlsx')
new_cols_label = []
new_cols_sample = []
# print(speciments.index)
for i in speciments.index:
    label = speciments['Specimen Label'][i]
    stype = speciments['Sample Type'][i]
    sample = speciments['Participant Protocol Identifier : Collection Protocol Registration'][i]
    if pd.isna(sample):
        sample = label
    if stype == 'Tumor':
        for j in unshared_values.columns:
            if label.split(' ')[0] in j:
                new_cols_label.append(j)
                new_cols_sample.append('BRCA.'+sample)
new_profiles = unshared_values[new_cols_label]
new_profiles.columns = new_cols_sample
unique_profiles = new_profiles.loc[:, ~new_profiles.columns.duplicated()]
#
# print(unique_profiles.median())
# print(unique_profiles)
# print("-------")
f1 = unique_profiles - unique_profiles.median()
# print(f1)
st = np.sqrt(np.square(f1).sum()/f1.shape[0])
print(type(st))
f2 = f1/st
f3 = f2.T-f2.T.median()
st1 = np.sqrt(np.square(f3).sum()/f3.shape[0])
f4 = (f3/st1).T

# f4.to_csv('breast.csv')
# print(f2)
